package com.hymake.processor;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import com.hymake.model.Clazz;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.model.HttpRequestBody;
import us.codecraft.webmagic.pipeline.Pipeline;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.utils.HttpConstant;

/**
 * 厦门海迈建设培训开班信息
 * 
 */
@Component
public class ClazzProcessor implements PageProcessor {
	
    @Autowired
    private Pipeline clazzDaoPipeline;
    
    public static final String URL_LIST = "http://www.xmjspx.cn/ajax/ajaxLoadModuleDom_h.jsp";

	private Site site = Site.me();

    public void process(Page page) {
    	List<Clazz> clazzs = new ArrayList<Clazz>();
    	List<String> titles = page.getHtml().xpath("//a[@class='J_mixNewsStyleTitle fk-newsListTitle mixNewsStyleTitle  pic-mixNewsStyleTitle']/text()").all();
    	List<String> urls = page.getHtml().xpath("//a[@class='J_mixNewsStyleTitle fk-newsListTitle mixNewsStyleTitle  pic-mixNewsStyleTitle']/@href").all();
    	List<String> dates = page.getHtml().xpath("//span[@class='mixNewsStyleDate fk-newsListDate']/text()").all();
//    	String totalPage = page.getHtml().xpath("//div[@id='pagenation458']").toString();
//    	String nextPage = page.getHtml().xpath("//div[@class='pageNext']/text()").toString();
    	
    	int index = 0;
    	for (String date : dates) {
    		String url = urls.get(index);
    		String title = titles.get(index);
    		Clazz clazz = new Clazz();
    		clazz.setUrl("http://www.xmjspx.cn/" + url.substring(2, url.length()-2)); // http://www.xmjspx.cn\"nd.jsp?id=235#_np=106_458\"
    		try {
    			clazz.setTitle(title.substring(0, title.length()-12));
			} catch (Exception e) {
				e.printStackTrace();
			}
    		
    		clazz.setPublishTime(date.substring(0, date.length()-8));
    		clazzs.add(clazz);
    		index++;
		}
//            
        page.putField("repo", clazzs);

    }
    
    public Site getSite() {
        return site;
    }
	
	public void crawl(PageProcessor crawler) {
		Spider spider = Spider.create(crawler);//
		spider.addPipeline(clazzDaoPipeline)//
				.thread(1);//
				
		
		for (int i = 1; i <= 2; i++) {
        	Request request = setRequest(i);
        	spider.addRequest(request);
		}
		
		spider.run();
	}

	private Request setRequest(int pageNum) {
		Request request = new Request("http://www.xmjspx.cn/ajax/ajaxLoadModuleDom_h.jsp");
		request.setMethod(HttpConstant.Method.POST);
		Map<String, Object> params = new HashMap<String, Object>();
		params.put("_colId", 106);
		params.put("_extId", 0);
		params.put("cmd", "getAjaxPageModuleInfo");
		params.put("href", "/col.jsp?id=106&m458pageno="+pageNum+"#fai_458_top");
		params.put("moduleId", 458);
		request.setRequestBody(HttpRequestBody.form(params , "utf-8"));
		return request;
	}

}
